# select all the top performing perturbation combinations

args = commandArgs(TRUE)
cutoff = args[1]
best = args[2]

# read sorted perturbations
infile=sprintf("cutoff%s_all_ranked_perturbations.txt",cutoff)
if(	file.exists(infile)) {
	perts = read.table(infile,header=TRUE,row.names=1,stringsAsFactors=FALSE)
} else {
	stop("Perturbations file not found")
}
perts = perts[!is.na(perts$Names),]
r_perts = perts[order(perts$Flipped_genes,decreasing=TRUE),] #order by %flipping
r_perts = r_perts[perts$Names != "",]
if(nrow(r_perts) == 0) {
	stop("Perturbations were not flipping anything")
}
print(sprintf("Total perturbations: %i",nrow(r_perts)))

# select the best perturbation that is not only the sum of single perturbations

keep_pert = NULL
prev_flip = 20000
counter = 0

for (perturbation in rownames(r_perts)) {
	found=FALSE
	singles = unlist(lapply(perturbation, function(x) strsplit(x,";")[[1]]))
	p_nodes = unlist(lapply(singles, function(x) strsplit(x,"_")[[1]][1]))
	if(length(singles) ==1){ # unique perturbation
		found=TRUE
	} else { # combination
		flipped = strsplit(r_perts[perturbation,"Names"],";")[[1]]
		singles_flipped = unique(unlist(strsplit(r_perts[singles,"Names"],";")))
		singles_flipped = singles_flipped[!is.na(singles_flipped)]
		#if the combined perturbation has some flipped genes unique to it, use it
		if(length(flipped) >= length(singles_flipped) & length(setdiff(flipped,singles_flipped))>0){ 
			found=TRUE
		}
	}
	
	if(!found) next
	
	keep_pert = c(keep_pert,perturbation)
	if (r_perts[perturbation,"Flipped_genes"] < prev_flip) {
		prev_flip = r_perts[perturbation,"Flipped_genes"]
		counter= counter+1
	}
	
	if (counter == as.integer(best)+1) { #keep the best top flipping scores (all combinations with such flipping)
		keep_pert = keep_pert[-length(keep_pert)]
		break
	} 
}

print(sprintf("Selected perturbations: %i",length(keep_pert)))

write.table(r_perts[keep_pert,],sprintf("bestcomb%s_perturbations-cutoff%s.txt",best,cutoff),col.names=TRUE,row.names=TRUE,sep="\t")
